import pandas as pd
import seaborn as sns
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline
import warnings
warnings.filterwarnings('ignore')
px.set_mapbox_access_token(open(".mapbox_token").read())
cnf, dth, rec, act = '#ff2e63', '#a9a9a9', '#21bf73', '#fe9801'
whole = pd.read_csv("covid_19_clean_complete.csv", parse_dates=['Date'])
whole.head()
whole.columns = ['State', 'Country', 'Lat', 'Long', 'Date', 'Confirmed',
'Deaths', 'Recovered']
whole.isnull().sum()
whole.shape
whole.State = whole.State.fillna('')
whole['Active'] = whole.Confirmed - whole.Deaths - whole.Recovered
whole[['Confirmed', 'Deaths', 'Recovered', 'Active']] = whole[['Confirmed', 'Deaths', 'Recovered', 'Active']].fillna(0)
whole.dtypes
num_cases = whole.groupby('Date')['Recovered', 'Deaths','Active'].sum().reset_index()
num_cases = num_cases.melt(id_vars="Date", value_vars=['Recovered', 'Deaths','Active'],
var_name='Case', value_name='Count')
fig = px.area(num_cases,x='Date',y='Count',color='Case', width=600, height=600,
labels={'Count':'No. of cases'} ,title='No. of cases Vs Time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
num_cases_shift = whole.groupby('Date')['Confirmed'].sum().reset_index().shift(1)
num_cases = whole.groupby('Date')['Confirmed'].sum().reset_index()
num_cases = whole.groupby('Date')['Confirmed'].sum().reset_index()
num_cases.Confirmed = (num_cases-num_cases_shift)['Confirmed'].fillna(0)
fig = px.bar(num_cases,x='Date',y='Confirmed', width=600, height=600,
labels={'Confirmed':'No. of confirmed cases'}, title='Increase in no. of cases on daily basis',
color_discrete_sequence = [cnf])
fig.show()
temp = whole[whole['Active']!=0]
num_country_shift = temp.groupby('Date').agg({'Country': 'count'}).reset_index().shift(1)
num_country = temp.groupby('Date').agg({'Country': 'count'}).reset_index()
num_country_plt = num_country.copy()
num_country_plt.Country = (num_country_plt-num_country_shift)['Country'].fillna(0)
num_country_plt.head()
fig = px.bar(num_country_plt,x='Date',y='Country', width=600, height=600,
labels={'Country':'No. of countries'},
title='Increase/decrease in no. of countries <br>with covid-19 cases on daily basis',
color_discrete_sequence = [dth])
fig.show()
fig = px.bar(num_country,x='Date',y='Country', width=600, height=600,
labels={'Country':'No. of countries'},
title='No. of countries with covid-19 cases Vs Time',
color_discrete_sequence = [dth])
fig.show()
top_10 = whole.groupby('Country')['Confirmed'].sum().reset_index()
top_10 = top_10.sort_values('Confirmed', ascending=False).iloc[:10,:]
labels = top_10.Country
fig = make_subplots(rows=1, cols=1, specs=[[{'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=top_10.Confirmed),
1, 1)
fig.update_traces(hole=.4, hoverinfo="label+percent")
fig.update_layout(
title_text="Top 10 countries with most Covid-19 cases",
annotations=[dict(text='Covid-19 cases<br>distribution', x=0.5, y=0.5, font_size=20, showarrow=False)],
autosize=False,
width=700,
height=700)
fig.show()
top_10_rec=whole[whole.Date=='2020-04-04'].groupby('Country').Recovered.sum().reset_index().sort_values('Recovered',ascending=False).iloc[:10]
top_10_act=whole[whole.Date=='2020-04-04'].groupby('Country').Active.sum().reset_index().sort_values('Active',ascending=False).iloc[:10]
top_10_dead=whole[whole.Date=='2020-04-04'].groupby('Country').Deaths.sum().reset_index().sort_values('Deaths',ascending=False).iloc[:10]
top_10_con=whole[whole.Date=='2020-04-04'].groupby('Country').Confirmed.sum().reset_index().sort_values('Confirmed',ascending=False).iloc[:10]
fig_rec = px.bar(top_10_rec,x='Country',y='Recovered', width=600, height=600,
title='No. of countries with most recoveries',
color_discrete_sequence = [rec],text='Recovered')
fig_rec.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_act = px.bar(top_10_act,x='Country',y='Active', width=600, height=600,
title='No. of countries with most active cases',
color_discrete_sequence = [act],text='Active')
fig_act.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_dead = px.bar(top_10_dead,x='Country',y='Deaths', width=600, height=600,
title='No. of countries with most deaths',
color_discrete_sequence = [dth],text='Deaths')
fig_dead.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_con = px.bar(top_10_con,x='Country',y='Confirmed', width=600, height=600,
title='No. of countries with most confirmed cases',
color_discrete_sequence = [cnf],text='Confirmed')
fig_con.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig = make_subplots(rows=2, cols=2, shared_xaxes=False, horizontal_spacing=0.14, vertical_spacing=0.14,
subplot_titles=('Recovered', 'Active cases','Deaths reported','Confirmed cases'))
fig.add_trace(fig_rec['data'][0], row=1, col=1)
fig.add_trace(fig_act['data'][0], row=1, col=2)
fig.add_trace(fig_dead['data'][0], row=2, col=1)
fig.add_trace(fig_con['data'][0], row=2, col=2)
fig.update_layout(height=1000)
def first_n(df,n):
try:
init_date = df.Date.iloc[0]
mini = df[df.Confirmed >=n]
fin_date = mini.Date.iloc[0]
dt_time = fin_date-init_date
return init_date,fin_date,dt_time.days,mini.Confirmed.iloc[0]
except IndexError:
return 0,0,0,0
hun_cnf = whole.groupby('Country').apply(first_n,100)
hun_cnf = pd.DataFrame({'Country':hun_cnf.index,'Val':hun_cnf.values})
hun_cnf[['init_date', 'fin_date','days','Confirmed']] = pd.DataFrame(hun_cnf['Val'].tolist(), index=hun_cnf.index)
hun_cnf = hun_cnf.drop('Val',axis=1)[hun_cnf.days!=0].sort_values('days')
fig = px.scatter(hun_cnf,x='Country',y='days',color='Confirmed',title='No. of days to reach 100 or more cases')
fig.show()
fhun_cnf = whole.groupby('Country').apply(first_n,500)
fhun_cnf = pd.DataFrame({'Country':fhun_cnf.index,'Val':fhun_cnf.values})
fhun_cnf[['init_date', 'fin_date','days','Confirmed']] = pd.DataFrame(fhun_cnf['Val'].tolist(), index=fhun_cnf.index)
fhun_cnf = fhun_cnf.drop('Val',axis=1)[fhun_cnf.days!=0].sort_values('days')
fhun_cnf.sample(4)
fig = px.scatter(fhun_cnf,x='Country',y='days',color='Confirmed',title='No. of days to reach 500 or more cases')
fig.show()
th_cnf = whole.groupby('Country').apply(first_n,1000)
th_cnf = pd.DataFrame({'Country':th_cnf.index,'Val':th_cnf.values})
th_cnf[['init_date', 'fin_date','days','Confirmed']] = pd.DataFrame(th_cnf['Val'].tolist(), index=th_cnf.index)
th_cnf = th_cnf.drop('Val',axis=1)[th_cnf.days!=0].sort_values('days')
th_cnf.sample(4)
fig = px.scatter(th_cnf,x='Country',y='days',color='Confirmed',title='No. of days to reach 1000 or more cases')
fig.show()
a=whole.groupby(['Country','Date'])['Recovered'].sum().reset_index()
fig_rec=px.line(a,x='Date',y='Recovered',color='Country',title='Recovered cases',height=600)
fig_rec.show()
a=whole.groupby(['Country','Date'])['Active'].sum().reset_index()
fig_act=px.line(a,x='Date',y='Active',color='Country',title='Active cases',height=600)
fig_act.show()
a=whole.groupby(['Country','Date'])['Deaths'].sum().reset_index()
fig_dead=px.line(a,x='Date',y='Deaths',color='Country',title='Deaths cases',height=600)
fig_dead.show()
a=whole.groupby(['Country','Date'])['Confirmed'].sum().reset_index()
fig_con=px.line(a,x='Date',y='Confirmed',color='Country',title='Confirmed cases', height=600)
fig_con.show()
def single_day_apply(df,feat):
df = df.groupby('Date')[feat].sum()
return df.rolling(window=2).apply(lambda x:x[1]-x[0]).max()
def one_day_jump(df,feat,col):
one_day_df = df.groupby('Country').apply(single_day_apply,feat)
one_day_df = pd.DataFrame({'Country':one_day_df.index,'Count':one_day_df.values}).sort_values('Count',ascending=False)
return one_day_df.head(20).style.set_caption('Top 20 one day jump in '+feat+' cases').hide_index().bar(subset=["Count"], color=col)
one_day_jump(whole.copy(),'Confirmed',cnf)
one_day_jump(whole.copy(),'Deaths',dth)
one_day_jump(whole.copy(),'Recovered',rec)
one_day_jump(whole.copy(),'Active',act)
temp = whole[whole['Date'] == max(whole['Date'])]
latest_country = temp.groupby('Country')['Recovered','Confirmed','Deaths'].sum().reset_index()
fig = px.scatter_geo(latest_country, locations="Country",locationmode='country names',
size=np.log(latest_country.Confirmed),
color='Confirmed',
title='Confirmed cases around the globe',
projection="natural earth",hover_name='Country',color_continuous_scale=px.colors.sequential.Sunsetdark)
fig.show()
by_country = whole.groupby(['Date','Country'])['Recovered','Deaths','Confirmed','Active'].sum().reset_index()
fig = px.choropleth(by_country, locations="Country", locationmode='country names',
color=np.log(by_country["Confirmed"]),
hover_name="Country", animation_frame=by_country["Date"].dt.strftime('%d-%m-%Y'),
title='Animation of increase/decrease of cases on daily basis',
color_continuous_scale=px.colors.sequential.Mint)
fig.update(layout_coloraxis_showscale=False)
fig.show()
loc=temp[temp.Country=='China']
fig = px.scatter_mapbox(loc, lat=loc.Lat,lon=loc.Long,
size=np.log(loc.Confirmed),
color='Confirmed', title = 'Distribution of confirmed cases in China',
hover_name='State',color_continuous_scale=px.colors.sequential.Sunsetdark)
fig.show()
loc=temp[temp.Country=='Australia']
fig = px.scatter_mapbox(loc, lat=loc.Lat,lon=loc.Long,
size=np.log(loc.Confirmed),title = 'Distribution of confirmed cases in Australia',
color='Confirmed',
hover_name='State',color_continuous_scale=px.colors.sequential.Sunsetdark)
fig.show()
raw = {'State':['New South Wales','Victoria','Queensland','Western Australia','South Australia','Tasmania','Australian Capital Territory','Northern Territory'],
'Population':[7317500,5640900,4599400,2366900,1659800,511000,366900,231200]}
aus_pop = pd.DataFrame(raw)
aus_pop = aus_pop.merge(loc,on='State',how='left')
fig = px.parallel_coordinates(aus_pop[['Population','Confirmed']], width=600,height=600)
fig.show()